#################################################################
# The functions contained in this file implements               #
# the cross-validated median fit (CVMF) test                    #
# in Desmarais and Harden (Forthcoming, Political Analysis).    #
# The computation of this test statistic may                    #
# take several minutes. Given N observations in                 #
# the data, the Cox model must be estimated N                   #
# times by both PLM and IRR (2N estimations).                   #
#################################################################


CVMF <- function(formula,data,method="breslow",trunc=0.95){
## "formula" is a standard R formula representation of the model to be
# estimated, y ~ x1 + x2 + ... + xk. Note that y must be a "Surv" object
## "data" is the data frame containing the variables
## "method" is the algorithm for handling ties in the PLM. Note that 
# only the Breslow method is available in the coxrobust implementation of IRR.
## "trunc" the proportion of observations to recveive positive weight in 
# the next round of IRR (i.e., M in Desmarais and Harden (XXXX)).

# Load standard survival analysis package
require(survival)
# Load package for IRR
require(coxrobust)

# Estimate PLM
plm <- coxph(formula,data=data,method=method,y=T,x=T)
surv <- plm$y
x <- plm$x

# Estimate IRR
irr <- coxr(formula, data=data,trunc=trunc)

# Prep for cross-validation
n <- nrow(x)
Cvll.r <- numeric(n)
Cvll.c <- numeric(n)

# Loop through for cross-validation
for(i in 1:n){
	# remove current observation
	ind <- i
	survi <- surv[-ind,]
	xi <- as.matrix(x[-ind,])
	
	# estimate models without i
	esti <- coxr(survi~xi, trunc=trunc)
	pesti <- coxph(survi~xi,method=method)
	
	# Check if any parameters were undefined without observation i
	# This can happen with very sparse and/or very many covariates (e.g., fixed effect dummies)
	na.ind <- which(is.na(pesti$coefficients))
	
	# Extract coefficients and covariates
	coef.p <- pesti$coefficients
	x.p <- x
	xi.p <- xi

	# Remove any covariates with undefined effects
	if(length(na.ind) >0){
		coef.p <- pesti$coefficients[-na.ind]
		x.p <- x[,-na.ind]
		xi.p <- xi[,-na.ind]
	}


	# Compute the full and restrcicted partial likelihoods
	full.ll.r <- coxph(surv~ offset(as.matrix(x)%*%cbind(esti$coefficients)), method=method)$loglik
	full.ll.c <- coxph(surv~ offset(as.matrix(x.p)%*%cbind(coef.p)), method=method)$loglik
	esti.ll.r <- coxph(survi~ offset(as.matrix(xi)%*%cbind(esti$coefficients)), method=method)$loglik
	esti.ll.c <- coxph(survi~ offset(as.matrix(xi.p)%*%cbind(coef.p)), method=method)$loglik

	# Store
	Cvll.r[i] <- full.ll.r-esti.ll.r
	Cvll.c[i] <- full.ll.c-esti.ll.c
	}

# Compute the test
cvmf <- binom.test(sum(Cvll.r>Cvll.c),n,alternative="two.sided")
best <- ifelse(cvmf$statistic > n/2,"IRR","PLM")
p <- round(cvmf$p.value,digits=3)
res_sum <- cat(best," supported with a two-sided p-value of ",p,sep="", "\n")

# Construct the returned object
obj <- list(res_sum=res_sum,irr=irr,plm=plm,cvmf=cvmf,cvpl.irr=Cvll.r,cvpl.plm=Cvll.c)
# "irr" is the IRR object from coxr()
# "plm" is the PLM object from coxph()
# "cvmf" is the CVMF - two.sided binomial test, with higher values favoring IRR
# "cvpl.irr" is the cross-validated partial likelihood for IRR
# "cvpl.plm" is the cross-validated partial likelihood for PLM

res_sum
obj

}

### Toy Example. Delete the "#" before each line to run ##
## Read in survival library
# require(survival)
#
## Set the seed for replication purposes
# set.seed(12345)
#
# Create two covariates with measurement error in the second
# x1 <- rnorm(100)
# x2 <- rnorm(100)
# x2e <- x2 + rnorm(100, 0, 0.5)

## Create the dependent variable with the unobserved x2 (no measurement error)
## Each coefficient has a true value of 1 
# y <- rexp(100, exp(x1 + x2))
# y <- Surv(y)
#
## Put the observed variables into a data frame
# dat <- data.frame(y, x1, x2e)
#
## Define the formula
# form <- y ~ x1 + x2e
#
# results <- CVMF(formula = form, data = dat)

#
## Take a look at results
# results$irr
## Now the test
# results$cvmf






